This Jupyter Notebook describes all the code required to the Asset pricing workshop 2
# Importing packages required
import pandas as pd
import numpy as np
import pyreadstat
import pyarrow.feather as feather
import datetime
from datetime import date
import math
import scipy.stats
import statsmodels.regression.linear_model as lm
import statsmodels.tools.tools as ct
pd.options.display.float_format = '{:,.4f}'.format # Reduce decimal points to 4
pd.pandas.set_option('display.max_columns', None)
# Visualisation
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
# Reading the database with the ETF and KF Factors values
df = pd.read_excel('DATOS Taller 2 AP 2022.xlsx', sheet_name='ETF_DB',index_col=0)
# df = pd.read_excel('C:/Users/Miguel Angel/OneDrive - Universidad EAFIT/2022-1/Asset pricing/Workshop/Workshop 2/DATOS Taller 2 AP 2022.xlsx', sheet_name='ETF_DB',index_col=0)
# Data exploration - visualization
df_plot = df.reset_index()
df_plot2=pd.melt(df_plot, id_vars =['DATE'], value_vars =['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU','SECT', 'VUG', 'LRGF', 'ERP', 'SMB', 'HML', 'RF', 'MOM'])
fig = px.line(df_plot2, x="DATE", y="value", color='variable', title='ETFs values')
fig.show()
df_plot3=pd.melt(df_plot, id_vars =['DATE'], value_vars =['ERP', 'SMB', 'HML', 'RF', 'MOM'])
fig2 = px.line(df_plot3, x="DATE", y="value", color='variable', title='Factors values', color_discrete_sequence=px.colors.qualitative.Dark2)
fig2.show()
# To drop objets used
del [df_plot, fig, fig2, df_plot2, df_plot3]
# Database structure
df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 121 entries, 2012-02-29 to 2022-02-28 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 VTV 121 non-null float64 1 VGT 121 non-null float64 2 VB 121 non-null float64 3 USMV 121 non-null float64 4 PNQI 121 non-null float64 5 MTUM 107 non-null float64 6 XBI 121 non-null float64 7 SPHQ 121 non-null float64 8 ESGU 63 non-null float64 9 SECT 54 non-null float64 10 VUG 121 non-null float64 11 LRGF 83 non-null float64 12 ERP 121 non-null float64 13 SMB 121 non-null float64 14 HML 121 non-null float64 15 RF 121 non-null float64 16 MOM 121 non-null float64 dtypes: float64(17) memory usage: 17.0 KB
It is noticed that for some variables there few missing values such as: MTUM, ESGU, SECT, and LRGF.
# Database main stats
df.describe()
| VTV | VGT | VB | USMV | PNQI | MTUM | XBI | SPHQ | ESGU | SECT | VUG | LRGF | ERP | SMB | HML | RF | MOM | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 121.0000 | 121.0000 | 121.0000 | 121.0000 | 121.0000 | 107.0000 | 121.0000 | 121.0000 | 63.0000 | 54.0000 | 121.0000 | 83.0000 | 121.0000 | 121.0000 | 121.0000 | 121.0000 | 121.0000 |
| mean | 94.2680 | 173.7803 | 135.7637 | 49.2444 | 112.0500 | 101.6284 | 74.4656 | 28.4374 | 70.4888 | 31.7255 | 143.0460 | 31.5708 | 0.0119 | -0.0008 | -0.0007 | 0.0004 | 0.0020 |
| std | 22.9167 | 105.7000 | 39.2722 | 14.0322 | 60.6486 | 38.0144 | 30.7528 | 9.6530 | 16.9232 | 5.8871 | 66.0860 | 6.1456 | 0.0398 | 0.0265 | 0.0326 | 0.0007 | 0.0356 |
| min | 53.7700 | 66.7500 | 72.6800 | 27.6380 | 37.7999 | 51.3700 | 25.6200 | 14.7400 | 49.1260 | 24.2900 | 66.3100 | 22.6100 | -0.1338 | -0.0832 | -0.1402 | 0.0000 | -0.1245 |
| 25% | 79.2800 | 100.0000 | 111.2400 | 37.6800 | 68.2407 | 70.3999 | 51.3300 | 21.5800 | 58.3672 | 27.6383 | 99.6000 | 26.5850 | -0.0019 | -0.0189 | -0.0183 | 0.0000 | -0.0202 |
| 50% | 93.4700 | 133.0000 | 130.9300 | 47.7200 | 92.3100 | 95.6800 | 74.0300 | 27.3300 | 64.5300 | 29.0763 | 120.4700 | 31.2500 | 0.0143 | 0.0016 | -0.0038 | 0.0001 | 0.0036 |
| 75% | 107.7600 | 213.5600 | 154.6100 | 60.6300 | 137.7653 | 120.8150 | 90.5400 | 33.2200 | 81.4200 | 35.3800 | 166.1800 | 33.7900 | 0.0319 | 0.0140 | 0.0130 | 0.0009 | 0.0216 |
| max | 147.1100 | 458.1700 | 229.4900 | 80.9000 | 256.6500 | 190.6800 | 152.8800 | 53.2100 | 107.9000 | 44.8700 | 320.9000 | 46.5600 | 0.1365 | 0.0706 | 0.1279 | 0.0021 | 0.1006 |
The ETF with higher standard deviation is the VGT.
# To calculate the monthly growth
returns = df.pct_change()
# Database returns main stats
returns.describe()
| VTV | VGT | VB | USMV | PNQI | MTUM | XBI | SPHQ | ESGU | SECT | VUG | LRGF | ERP | SMB | HML | RF | MOM | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 120.0000 | 120.0000 | 120.0000 | 120.0000 | 120.0000 | 106.0000 | 120.0000 | 120.0000 | 62.0000 | 53.0000 | 120.0000 | 82.0000 | 120.0000 | 120.0000 | 120.0000 | 72.0000 | 120.0000 |
| mean | 0.0086 | 0.0157 | 0.0096 | 0.0087 | 0.0139 | 0.0116 | 0.0142 | 0.0107 | 0.0123 | 0.0102 | 0.0126 | 0.0078 | -2.8162 | -5.3380 | -0.4014 | inf | -0.1945 |
| std | 0.0387 | 0.0477 | 0.0487 | 0.0323 | 0.0564 | 0.0413 | 0.0854 | 0.0357 | 0.0459 | 0.0477 | 0.0427 | 0.0438 | 20.0015 | 38.6725 | 19.7084 | NaN | 17.7556 |
| min | -0.1558 | -0.1008 | -0.2224 | -0.1185 | -0.1169 | -0.1193 | -0.2799 | -0.0990 | -0.1296 | -0.1272 | -0.1083 | -0.1555 | -129.0000 | -417.0000 | -46.3333 | -1.0000 | -69.6667 |
| 25% | -0.0112 | -0.0126 | -0.0107 | -0.0099 | -0.0197 | -0.0112 | -0.0396 | -0.0089 | -0.0007 | -0.0111 | -0.0109 | -0.0107 | -1.8064 | -2.0933 | -1.9207 | -0.0132 | -2.0419 |
| 50% | 0.0125 | 0.0204 | 0.0126 | 0.0115 | 0.0176 | 0.0144 | 0.0142 | 0.0129 | 0.0171 | 0.0180 | 0.0163 | 0.0113 | -0.8480 | -1.0913 | -0.9273 | 0.0000 | -1.0816 |
| 75% | 0.0298 | 0.0448 | 0.0368 | 0.0312 | 0.0467 | 0.0368 | 0.0764 | 0.0336 | 0.0339 | 0.0375 | 0.0360 | 0.0313 | 0.1081 | -0.0290 | -0.1185 | 0.1295 | 0.0840 |
| max | 0.1276 | 0.1420 | 0.1578 | 0.0937 | 0.1667 | 0.1170 | 0.2060 | 0.1226 | 0.1329 | 0.1195 | 0.1508 | 0.1153 | 72.9091 | 14.7241 | 201.0000 | inf | 152.5000 |
The ETF with the higher simple return is VGT with 0.015731 (1.573%). And the ETF with the higher standard deviation is XBI with 0.0853.
# To create an index based on montlhy growth
df_index = returns.pct_change().fillna(0) \
.add(1).cumprod().mul(100).reset_index()
df_indexfactor = df.add(1).cumprod().mul(100).reset_index()
# Data exploratory - visualization of indexes base 100
df_index_plot = df_index.reset_index()
df_index_plot=pd.melt(df_index_plot, id_vars =['DATE'], value_vars =['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU','SECT', 'VUG', 'LRGF'])
fig3 = px.line(df_index_plot, x="DATE", y="value", color='variable', title='ETFs growth index (base 100)', color_discrete_sequence=px.colors.qualitative.Bold)
fig3.show()
df_indexfactor_plot = df_indexfactor.reset_index()
df_indexfactor_plot=pd.melt(df_indexfactor_plot, id_vars =['DATE'], value_vars =['ERP', 'SMB', 'HML', 'RF', 'MOM'])
fig4 = px.line(df_indexfactor_plot, x="DATE", y="value", color='variable', title='Factors growth index (base 100)', color_discrete_sequence=px.colors.qualitative.Dark2)
fig4.show()
# To drop objets used
del [df_index_plot, df_index, fig3, df_indexfactor, df_indexfactor_plot, fig4, returns]
# Calculating the log returns for every variable in the dataset
logreturns = np.log(df) - np.log(df.shift(1))
# lnreturns = np.log1p(df.pct_change())
# To replace the columns avoiding the returns calculation for factors
columns=['ERP', 'SMB', 'HML', 'RF', 'MOM']
logreturns[columns] = df[columns]
del [columns] # To drop the aux object
logreturns = logreturns.iloc[1: , :] # To drop the first row of the dataframe
# Database returns main stats
logreturns.describe()
| VTV | VGT | VB | USMV | PNQI | MTUM | XBI | SPHQ | ESGU | SECT | VUG | LRGF | ERP | SMB | HML | RF | MOM | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 120.0000 | 120.0000 | 120.0000 | 120.0000 | 120.0000 | 106.0000 | 120.0000 | 120.0000 | 62.0000 | 53.0000 | 120.0000 | 82.0000 | 120.0000 | 120.0000 | 120.0000 | 120.0000 | 120.0000 |
| mean | 0.0078 | 0.0145 | 0.0084 | 0.0082 | 0.0122 | 0.0107 | 0.0105 | 0.0100 | 0.0112 | 0.0091 | 0.0116 | 0.0068 | 0.0117 | -0.0006 | -0.0007 | 0.0005 | 0.0021 |
| std | 0.0390 | 0.0473 | 0.0495 | 0.0324 | 0.0559 | 0.0412 | 0.0864 | 0.0356 | 0.0460 | 0.0481 | 0.0425 | 0.0443 | 0.0399 | 0.0266 | 0.0327 | 0.0007 | 0.0358 |
| min | -0.1694 | -0.1062 | -0.2515 | -0.1261 | -0.1244 | -0.1270 | -0.3284 | -0.1043 | -0.1388 | -0.1360 | -0.1146 | -0.1690 | -0.1338 | -0.0832 | -0.1402 | 0.0000 | -0.1245 |
| 25% | -0.0112 | -0.0127 | -0.0107 | -0.0100 | -0.0199 | -0.0113 | -0.0404 | -0.0089 | -0.0007 | -0.0112 | -0.0110 | -0.0108 | -0.0036 | -0.0191 | -0.0184 | 0.0000 | -0.0203 |
| 50% | 0.0124 | 0.0202 | 0.0125 | 0.0114 | 0.0174 | 0.0143 | 0.0141 | 0.0128 | 0.0169 | 0.0178 | 0.0161 | 0.0112 | 0.0140 | 0.0021 | -0.0039 | 0.0001 | 0.0038 |
| 75% | 0.0294 | 0.0438 | 0.0361 | 0.0307 | 0.0456 | 0.0361 | 0.0736 | 0.0331 | 0.0333 | 0.0368 | 0.0353 | 0.0309 | 0.0315 | 0.0144 | 0.0135 | 0.0009 | 0.0218 |
| max | 0.1201 | 0.1328 | 0.1465 | 0.0896 | 0.1542 | 0.1107 | 0.1873 | 0.1157 | 0.1248 | 0.1129 | 0.1405 | 0.1091 | 0.1365 | 0.0706 | 0.1279 | 0.0021 | 0.1006 |
The ETF with the higher log return is VGT also with 0.014505 (1.4505%). And the ETF with the higher standard deviation is XBI also with 0.086356.
# Returns Data exploratory - visualization
df_plot = logreturns.reset_index()
df_plot2=pd.melt(df_plot, id_vars =['DATE'], value_vars =['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU','SECT', 'VUG', 'LRGF', 'ERP', 'SMB', 'HML', 'RF', 'MOM'])
fig = px.line(df_plot2, x="DATE", y="value", color='variable', title='ETFs log return values', color_discrete_sequence=px.colors.qualitative.Safe)
fig.show()
df_plot3=pd.melt(df_plot, id_vars =['DATE'], value_vars =['ERP', 'SMB', 'HML', 'RF', 'MOM'])
fig2 = px.line(df_plot3, x="DATE", y="value", color='variable', title='Factors log values', color_discrete_sequence=px.colors.qualitative.Vivid)
fig2.show()
# To drop objets used
del [df_plot, fig, fig2, df_plot2, df_plot3]
# Data exploratory - visualization of return indexes base 100
df_indexfactor = logreturns.add(1).cumprod().mul(100).reset_index()
df_index_plot = df_indexfactor.reset_index()
df_index_plot=pd.melt(df_index_plot, id_vars =['DATE'], value_vars =['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU','SECT', 'VUG', 'LRGF', 'ERP', 'SMB', 'HML', 'RF', 'MOM'])
fig3 = px.line(df_index_plot, x="DATE", y="value", color='variable', title='ETFs and Factors log return index (base 100)')
fig3.show()
# To drop objets used
del [df_index_plot, fig3, df_indexfactor]
1.1. To determine the statically significance of factors
# To calculate the mean by column
logreturns_mean=logreturns.mean(axis = 0, skipna = True)
# To calculate the std by column
logreturns_std=logreturns.std(axis = 0, skipna = True)
# To calculate the Z value by column
z_value = (logreturns_mean*math.sqrt(len(df.columns))/logreturns_std)
# It is created a dataframe to get all the information in one object
frame = {'z_value': z_value, 'mean': logreturns_mean, 'std': logreturns_std}
result = pd.DataFrame(frame)
# Find p-value
z_pvalue = pd.DataFrame(scipy.stats.norm.sf(abs(result.z_value))*2)
result.reset_index(inplace=True)
result['zpvalue'] = z_pvalue # To add the p value of the Z score for every factor (including the ETFs)
result
| index | z_value | mean | std | zpvalue | |
|---|---|---|---|---|---|
| 0 | VTV | 0.8247 | 0.0078 | 0.0390 | 0.4096 |
| 1 | VGT | 1.2651 | 0.0145 | 0.0473 | 0.2058 |
| 2 | VB | 0.6964 | 0.0084 | 0.0495 | 0.4862 |
| 3 | USMV | 1.0403 | 0.0082 | 0.0324 | 0.2982 |
| 4 | PNQI | 0.9012 | 0.0122 | 0.0559 | 0.3675 |
| 5 | MTUM | 1.0690 | 0.0107 | 0.0412 | 0.2851 |
| 6 | XBI | 0.4992 | 0.0105 | 0.0864 | 0.6177 |
| 7 | SPHQ | 1.1575 | 0.0100 | 0.0356 | 0.2471 |
| 8 | ESGU | 1.0017 | 0.0112 | 0.0460 | 0.3165 |
| 9 | SECT | 0.7771 | 0.0091 | 0.0481 | 0.4371 |
| 10 | VUG | 1.1276 | 0.0116 | 0.0425 | 0.2595 |
| 11 | LRGF | 0.6325 | 0.0068 | 0.0443 | 0.5271 |
| 12 | ERP | 1.2057 | 0.0117 | 0.0399 | 0.2279 |
| 13 | SMB | -0.0957 | -0.0006 | 0.0266 | 0.9237 |
| 14 | HML | -0.0917 | -0.0007 | 0.0327 | 0.9270 |
| 15 | RF | 2.8227 | 0.0005 | 0.0007 | 0.0048 |
| 16 | MOM | 0.2410 | 0.0021 | 0.0358 | 0.8095 |
According to the p value observed in the last table the factors ERP, SMB, HML and MOM showed to be statically different from zero. Which means that all the factors mentioned could give additional information about the ETF returns. Also, there is evidence that the returns provident by the ETF are statistically different from zero.
1.2. To calculate the returns in excess from each ETF.
# Calculating the return in excess for each ETF
logreturns['VTVn'] =logreturns['VTV'] -logreturns['RF']
logreturns['VGTn'] =logreturns['VGT'] -logreturns['RF']
logreturns['VBn'] =logreturns['VB'] -logreturns['RF']
logreturns['USMVn'] =logreturns['USMV'] -logreturns['RF']
logreturns['PNQIn'] =logreturns['PNQI'] -logreturns['RF']
logreturns['MTUMn'] =logreturns['MTUM'] -logreturns['RF']
logreturns['XBIn'] =logreturns['XBI'] -logreturns['RF']
logreturns['SPHQn'] =logreturns['SPHQ'] -logreturns['RF']
logreturns['ESGUn'] =logreturns['ESGU'] -logreturns['RF']
logreturns['SECTn'] =logreturns['SECT'] -logreturns['RF']
logreturns['VUGn'] =logreturns['VUG'] -logreturns['RF']
logreturns['LRGFn'] =logreturns['LRGF'] -logreturns['RF']
# Data exploratory - visualization of return indexes base 100
df_indexfactor = logreturns.add(1).cumprod().mul(100).reset_index()
df_index_plot = df_indexfactor.reset_index()
df_index_plot2=pd.melt(df_index_plot, id_vars =['DATE'], value_vars =['VTVn', 'VGTn', 'VBn', 'USMVn', 'PNQIn', 'MTUMn', 'XBIn', 'SPHQn', 'ESGUn', 'SECTn', 'VUGn', 'LRGFn'])
fig3 = px.line(df_index_plot2, x="DATE", y="value", color='variable', title='ETFs return in excess index (base 100)')
fig3.show()
df_index_plot3=pd.melt(df_index_plot, id_vars =['DATE'], value_vars =['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU','SECT', 'VUG', 'LRGF'])
fig4 = px.line(df_index_plot3, x="DATE", y="value", color='variable', title='ETFs return index (base 100)')
fig4.show()
# To drop objets used
del [logreturns_mean, logreturns_std, z_pvalue, z_value, df_index_plot, fig3, df_indexfactor, fig4, df_index_plot2, df_index_plot3, frame]
1.3. Evaluating the ETF performance
# To calculate the mean by column
logreturns_mean=logreturns.mean(axis = 0, skipna = True)
# To calculate the std by column
logreturns_std=logreturns.std(axis = 0, skipna = True)
# It is created a dataframe to get all the information in one object
frame = {'ER': logreturns_mean, 'SIGMA': logreturns_std} # Expected returns (EP)
result = pd.DataFrame(frame)
result['SHARPE'] = result['ER'] / result['SIGMA']
result
| ER | SIGMA | SHARPE | |
|---|---|---|---|
| VTV | 0.0078 | 0.0390 | 0.2000 |
| VGT | 0.0145 | 0.0473 | 0.3068 |
| VB | 0.0084 | 0.0495 | 0.1689 |
| USMV | 0.0082 | 0.0324 | 0.2523 |
| PNQI | 0.0122 | 0.0559 | 0.2186 |
| MTUM | 0.0107 | 0.0412 | 0.2593 |
| XBI | 0.0105 | 0.0864 | 0.1211 |
| SPHQ | 0.0100 | 0.0356 | 0.2807 |
| ESGU | 0.0112 | 0.0460 | 0.2430 |
| SECT | 0.0091 | 0.0481 | 0.1885 |
| VUG | 0.0116 | 0.0425 | 0.2735 |
| LRGF | 0.0068 | 0.0443 | 0.1534 |
| ERP | 0.0117 | 0.0399 | 0.2924 |
| SMB | -0.0006 | 0.0266 | -0.0232 |
| HML | -0.0007 | 0.0327 | -0.0222 |
| RF | 0.0005 | 0.0007 | 0.6846 |
| MOM | 0.0021 | 0.0358 | 0.0585 |
| VTVn | 0.0073 | 0.0391 | 0.1879 |
| VGTn | 0.0141 | 0.0473 | 0.2971 |
| VBn | 0.0079 | 0.0496 | 0.1594 |
| USMVn | 0.0077 | 0.0325 | 0.2379 |
| PNQIn | 0.0118 | 0.0560 | 0.2103 |
| MTUMn | 0.0102 | 0.0413 | 0.2466 |
| XBIn | 0.0100 | 0.0864 | 0.1157 |
| SPHQn | 0.0095 | 0.0357 | 0.2673 |
| ESGUn | 0.0103 | 0.0461 | 0.2241 |
| SECTn | 0.0082 | 0.0483 | 0.1697 |
| VUGn | 0.0112 | 0.0425 | 0.2625 |
| LRGFn | 0.0061 | 0.0444 | 0.1382 |
# In the last table it is shown that the Expected RF in equal to 0.000453 (row 15 and column 1)
result['RF'] = result.iloc[15]['ER']
# result['RF'] = 0.0004533333333333336
# To get all the Jessen's Alphas into a data frame it is created an empty dataframe
jalpha_df = pd.DataFrame()
# List of ETF
ETF_list = ['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU', 'SECT', 'VUG', 'LRGF']
# list of models
models = []
for resp in ETF_list:
formula = resp + " ~ ERP"
models.append(lm.OLS.from_formula(formula, data = logreturns, hasconst=bool).fit())
# To extrac every parameter from models
jalpha_df['VTV']=models[0].params
jalpha_df['VGT']=models[1].params
jalpha_df['VB']=models[2].params
jalpha_df['USMV']=models[3].params
jalpha_df['PNQI']=models[4].params
jalpha_df['MTUM']=models[5].params
jalpha_df['XBI']=models[6].params
jalpha_df['SPHQ']=models[7].params
jalpha_df['ESGU']=models[8].params
jalpha_df['SECT']=models[9].params
jalpha_df['VUG']=models[10].params
jalpha_df['LRGF']=models[11].params
# print(models[11].summary())
# To get all the Jessen's Alphas pvalues into a data frame it is created an empty dataframe
pjalpha_df = pd.DataFrame()
# To extrac every parameter p value from models
pjalpha_df['VTV']=models[0].pvalues
pjalpha_df['VGT']=models[1].pvalues
pjalpha_df['VB']=models[2].pvalues
pjalpha_df['USMV']=models[3].pvalues
pjalpha_df['PNQI']=models[4].pvalues
pjalpha_df['MTUM']=models[5].pvalues
pjalpha_df['XBI']=models[6].pvalues
pjalpha_df['SPHQ']=models[7].pvalues
pjalpha_df['ESGU']=models[8].pvalues
pjalpha_df['SECT']=models[9].pvalues
pjalpha_df['VUG']=models[10].pvalues
pjalpha_df['LRGF']=models[11].pvalues
# To transpose the dataset
pjalpha_df = pjalpha_df.T
pjalpha_df.rename(columns = {'Intercept':'ALPHA_CAPM', 'ERP':'BETA_CAPM'}, inplace = True) # To rename the columns
# To get all the Jessen's Alphas R2 into a data frame it is created an empty dataframe
# To extrac every R2 from models
# Initializing list of lists
data = [['VTV',models[0].rsquared], ['VGT',models[1].rsquared], ['VB',models[2].rsquared], ['USMV',models[3].rsquared], ['PNQI',models[4].rsquared], ['MTUM',models[5].rsquared], ['XBI',models[6].rsquared], ['SPHQ',models[7].rsquared], ['ESGU',models[8].rsquared], ['SECT',models[9].rsquared], ['VUG',models[10].rsquared], ['LRGF',models[11].rsquared]]
# Creating the pandas DataFrame with the R2 for every model
models_r2 = pd.DataFrame(data, columns = ['index', 'CAPM'])
del [data]
# To get all the Jessen's Alphas R2 into a data frame it is created an empty dataframe
# To extrac every R2 from models
# Initializing list of lists
data = [['VTV',models[0].rsquared_adj], ['VGT',models[1].rsquared_adj], ['VB',models[2].rsquared_adj], ['USMV',models[3].rsquared_adj], ['PNQI',models[4].rsquared_adj], ['MTUM',models[5].rsquared_adj], ['XBI',models[6].rsquared_adj], ['SPHQ',models[7].rsquared_adj], ['ESGU',models[8].rsquared_adj], ['SECT',models[9].rsquared_adj], ['VUG',models[10].rsquared_adj], ['LRGF',models[11].rsquared_adj]]
# Creating the pandas DataFrame with the R2 for every model
models_r2a = pd.DataFrame(data, columns = ['index', 'CAPM'])
del [data]
# To clean the memory
del[formula, frame, models, resp, ETF_list, logreturns_std, logreturns_mean]
# To transpose the dataset
jalpha_df = jalpha_df.T
jalpha_df.rename(columns = {'Intercept':'ALPHA_CAPM', 'ERP':'BETA_CAPM'}, inplace = True) # To rename the columns
# Joining the result dataframe and the Jesen's Alpha dataframe to have a sinlge dataframe of results
result.reset_index(inplace=True)
jalpha_df.reset_index(inplace=True)
result = pd.merge(result, jalpha_df, how='left', left_on=['index'], right_on=['index'])
result = result.dropna(subset=['ALPHA_CAPM']) #Droping NA in the CAPM column
del [jalpha_df]
Treynor ratio
# Calculating the Treynor ratio
result['TR'] = (result['ER'] - result['RF']) / result['BETA_CAPM']
Fama and French 3 factors model
# To get all the Jessen's Alphas from the 3 factors FF model into a data frame it is created an empty dataframe
jalpha_df = pd.DataFrame()
# List of ETF
ETF_list = ['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU', 'SECT', 'VUG', 'LRGF']
# list of models
models = []
for resp in ETF_list:
formula = resp + " ~ ERP + SMB + HML"
models.append(lm.OLS.from_formula(formula, data = logreturns, hasconst=bool).fit())
# To extrac every parameter from models
jalpha_df['VTV']=models[0].params
jalpha_df['VGT']=models[1].params
jalpha_df['VB']=models[2].params
jalpha_df['USMV']=models[3].params
jalpha_df['PNQI']=models[4].params
jalpha_df['MTUM']=models[5].params
jalpha_df['XBI']=models[6].params
jalpha_df['SPHQ']=models[7].params
jalpha_df['ESGU']=models[8].params
jalpha_df['SECT']=models[9].params
jalpha_df['VUG']=models[10].params
jalpha_df['LRGF']=models[11].params
# print(models[11].summary())
# To reshape DataFrame from wide format to long format
jalpha_df.reset_index(inplace=True)
jalpha_df.drop([1,2,3], axis=0, inplace=True)
jalpha_df = pd.melt(jalpha_df, id_vars='index', value_vars=['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU', 'SECT', 'VUG', 'LRGF'])
del jalpha_df['index']
jalpha_df.rename(columns = {'value':'3FF', 'variable':'index'}, inplace = True) # To rename the columns
# Joining the result dataframe and the Jesen's Alpha dataframe to have a sinlge dataframe of results
result = pd.merge(result, jalpha_df, how='left', left_on=['index'], right_on=['index'])
# To get all the Jessen's Alphas pvalues into a data frame it is created an empty dataframe
pjalpha_df2 = pd.DataFrame()
# To extrac every parameter p value from models
pjalpha_df2['VTV']=models[0].pvalues
pjalpha_df2['VGT']=models[1].pvalues
pjalpha_df2['VB']=models[2].pvalues
pjalpha_df2['USMV']=models[3].pvalues
pjalpha_df2['PNQI']=models[4].pvalues
pjalpha_df2['MTUM']=models[5].pvalues
pjalpha_df2['XBI']=models[6].pvalues
pjalpha_df2['SPHQ']=models[7].pvalues
pjalpha_df2['ESGU']=models[8].pvalues
pjalpha_df2['SECT']=models[9].pvalues
pjalpha_df2['VUG']=models[10].pvalues
pjalpha_df2['LRGF']=models[11].pvalues
# To transpose the dataset
pjalpha_df2 = pjalpha_df2.T
pjalpha_df2.rename(columns = {'Intercept':'ALPHA_3FF', 'ERP':'BETA_3FF', 'SMB':'SMB_3FF', 'HML':'HML_3FF'}, inplace = True) # To rename the columns
pjalpha_df.reset_index(inplace=True)
pjalpha_df2.reset_index(inplace=True)
# Joining the result dataframe and the Jesen's Alpha dataframe to have a sinlge dataframe of results
pjalpha_df3 = pd.merge(pjalpha_df, pjalpha_df2, how='outer', left_on=['index'], right_on=['index'])
# To get all the Jessen's Alphas R2 into a data frame it is created an empty dataframe
# To extrac every R2 from models
# Initializing list of lists
data = [['VTV',models[0].rsquared], ['VGT',models[1].rsquared], ['VB',models[2].rsquared], ['USMV',models[3].rsquared], ['PNQI',models[4].rsquared], ['MTUM',models[5].rsquared], ['XBI',models[6].rsquared], ['SPHQ',models[7].rsquared], ['ESGU',models[8].rsquared], ['SECT',models[9].rsquared], ['VUG',models[10].rsquared], ['LRGF',models[11].rsquared]]
# Creating the pandas DataFrame with the R2 for every model
models3FF_r2 = pd.DataFrame(data, columns = ['index', '3FF'])
# Joining the result dataframe and the Jesen's Alpha dataframe to have a sinlge dataframe of results
models_r2 = pd.merge(models_r2, models3FF_r2, how='outer', left_on=['index'], right_on=['index'])
# To get all the Jessen's Alphas R2 into a data frame it is created an empty dataframe
# To extrac every R2 from models
# Initializing list of lists
data = [['VTV',models[0].rsquared_adj], ['VGT',models[1].rsquared_adj], ['VB',models[2].rsquared_adj], ['USMV',models[3].rsquared_adj], ['PNQI',models[4].rsquared_adj], ['MTUM',models[5].rsquared_adj], ['XBI',models[6].rsquared_adj], ['SPHQ',models[7].rsquared_adj], ['ESGU',models[8].rsquared_adj], ['SECT',models[9].rsquared_adj], ['VUG',models[10].rsquared_adj], ['LRGF',models[11].rsquared_adj]]
# Creating the pandas DataFrame with the R2 for every model
models3FF_r2a = pd.DataFrame(data, columns = ['index', '3FF'])
# Joining the result dataframe and the Jesen's Alpha dataframe to have a sinlge dataframe of results
models_r2a = pd.merge(models_r2a, models3FF_r2a, how='outer', left_on=['index'], right_on=['index'])
# To clean the memory
del[data, pjalpha_df, pjalpha_df2, models3FF_r2, jalpha_df, models, resp, ETF_list, formula]
Fama and French 3 factors model including momentum
# To get all the Jessen's Alphas from the 3 factors + MOM FF model into a data frame it is created an empty dataframe
jalpha_df = pd.DataFrame()
# List of ETF
ETF_list = ['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU', 'SECT', 'VUG', 'LRGF']
# list of models
models = []
for resp in ETF_list:
formula = resp + " ~ ERP + SMB + HML + MOM"
models.append(lm.OLS.from_formula(formula, data = logreturns, hasconst=bool).fit())
# To extrac every parameter from models
jalpha_df['VTV']=models[0].params
jalpha_df['VGT']=models[1].params
jalpha_df['VB']=models[2].params
jalpha_df['USMV']=models[3].params
jalpha_df['PNQI']=models[4].params
jalpha_df['MTUM']=models[5].params
jalpha_df['XBI']=models[6].params
jalpha_df['SPHQ']=models[7].params
jalpha_df['ESGU']=models[8].params
jalpha_df['SECT']=models[9].params
jalpha_df['VUG']=models[10].params
jalpha_df['LRGF']=models[11].params
# print(models[11].summary())
# To reshape DataFrame from wide format to long format
jalpha_df.reset_index(inplace=True)
jalpha_df.drop([1,2,3,4], axis=0, inplace=True)
jalpha_df = pd.melt(jalpha_df, id_vars='index', value_vars=['VTV', 'VGT', 'VB', 'USMV', 'PNQI', 'MTUM', 'XBI', 'SPHQ', 'ESGU', 'SECT', 'VUG', 'LRGF'])
del jalpha_df['index']
jalpha_df.rename(columns = {'value':'3MFF', 'variable':'index'}, inplace = True) # To rename the columns
# Joining the result dataframe and the Jesen's Alpha dataframe to have a sinlge dataframe of results
result = pd.merge(result, jalpha_df, how='left', left_on=['index'], right_on=['index'])
# To get all the Jessen's Alphas pvalues into a data frame it is created an empty dataframe
pjalpha_df2 = pd.DataFrame()
# To extrac every parameter p value from models
pjalpha_df2['VTV']=models[0].pvalues
pjalpha_df2['VGT']=models[1].pvalues
pjalpha_df2['VB']=models[2].pvalues
pjalpha_df2['USMV']=models[3].pvalues
pjalpha_df2['PNQI']=models[4].pvalues
pjalpha_df2['MTUM']=models[5].pvalues
pjalpha_df2['XBI']=models[6].pvalues
pjalpha_df2['SPHQ']=models[7].pvalues
pjalpha_df2['ESGU']=models[8].pvalues
pjalpha_df2['SECT']=models[9].pvalues
pjalpha_df2['VUG']=models[10].pvalues
pjalpha_df2['LRGF']=models[11].pvalues
# To transpose the dataset
pjalpha_df2 = pjalpha_df2.T
pjalpha_df2.rename(columns = {'Intercept':'ALPHA_3MFF', 'ERP':'BETA_3MFF', 'SMB':'SMB_3MFF', 'HML':'HML_3MFF', 'MOM':'MOM_3MFF'}, inplace = True) # To rename the columns
pjalpha_df2.reset_index(inplace=True)
# Joining the result dataframe and the Jesen's Alpha dataframe to have a sinlge dataframe of results
pjalpha_df = pd.merge(pjalpha_df3, pjalpha_df2, how='outer', left_on=['index'], right_on=['index'])
del [pjalpha_df3, pjalpha_df2]
# To get all the Jessen's Alphas R2 into a data frame it is created an empty dataframe
# To extrac every R2 from models
# Initializing list of lists
data = [['VTV',models[0].rsquared], ['VGT',models[1].rsquared], ['VB',models[2].rsquared], ['USMV',models[3].rsquared], ['PNQI',models[4].rsquared], ['MTUM',models[5].rsquared], ['XBI',models[6].rsquared], ['SPHQ',models[7].rsquared], ['ESGU',models[8].rsquared], ['SECT',models[9].rsquared], ['VUG',models[10].rsquared], ['LRGF',models[11].rsquared]]
# Creating the pandas DataFrame with the R2 for every model
models3FF_r2 = pd.DataFrame(data, columns = ['index', '3MFF'])
# Joining the result dataframe and the Jesen's Alpha dataframe to have a sinlge dataframe of results
models_r2 = pd.merge(models_r2, models3FF_r2, how='outer', left_on=['index'], right_on=['index'])
# To get all the Jessen's Alphas R2 into a data frame it is created an empty dataframe
# To extrac every R2 from models
# Initializing list of lists
data = [['VTV',models[0].rsquared_adj], ['VGT',models[1].rsquared_adj], ['VB',models[2].rsquared_adj], ['USMV',models[3].rsquared_adj], ['PNQI',models[4].rsquared_adj], ['MTUM',models[5].rsquared_adj], ['XBI',models[6].rsquared_adj], ['SPHQ',models[7].rsquared_adj], ['ESGU',models[8].rsquared_adj], ['SECT',models[9].rsquared_adj], ['VUG',models[10].rsquared_adj], ['LRGF',models[11].rsquared_adj]]
# Creating the pandas DataFrame with the R2 for every model
models3FF_r2a = pd.DataFrame(data, columns = ['index', '3MFF'])
# Joining the result dataframe and the Jesen's Alpha dataframe to have a sinlge dataframe of results
models_r2a = pd.merge(models_r2a, models3FF_r2a, how='outer', left_on=['index'], right_on=['index'])
# To clean the memory
del[jalpha_df, models, resp, ETF_list, formula]
# Performance measures - visualization
fig = px.bar(result, x="index", y="SHARPE", title='Sharpe ratio for ETF')
fig.show()
fig3 = px.bar(result, x="index", y="TR", title='Treynor ratio for ETF', color_discrete_sequence=px.colors.qualitative.Pastel1)
fig3.show()
fig2 = px.bar(result, x="index", y="ALPHA_CAPM", title='Jensen Alpha for ETF', color_discrete_sequence=px.colors.qualitative.Set3)
fig2.show()
fig4 = px.bar(result, x="index", y="3FF", title='Fama and French 3 factors Alpha for ETF', color_discrete_sequence=px.colors.qualitative.Set1)
fig4.show()
fig5 = px.bar(result, x="index", y="3MFF", title='Fama and French 3 factors + momentum Alpha for ETF', color_discrete_sequence=px.colors.qualitative.Dark24)
fig5.show()
# To analize the pvalues for all the estimators from models
pjalpha_df
| index | ALPHA_CAPM | BETA_CAPM | ALPHA_3FF | BETA_3FF | SMB_3FF | HML_3FF | ALPHA_3MFF | BETA_3MFF | SMB_3MFF | HML_3MFF | MOM_3MFF | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | VTV | 0.0244 | 0.0000 | 0.0002 | 0.0000 | 0.0000 | 0.0000 | 0.0004 | 0.0000 | 0.0000 | 0.0000 | 0.6715 |
| 1 | VGT | 0.2929 | 0.0000 | 0.4940 | 0.0000 | 0.0151 | 0.0000 | 0.4945 | 0.0000 | 0.0159 | 0.0000 | 0.9435 |
| 2 | VB | 0.0035 | 0.0000 | 0.0012 | 0.0000 | 0.0000 | 0.0171 | 0.0004 | 0.0000 | 0.0000 | 0.0019 | 0.0368 |
| 3 | USMV | 0.9733 | 0.0000 | 0.4936 | 0.0000 | 0.0000 | 0.5782 | 0.2777 | 0.0000 | 0.0000 | 0.5138 | 0.0206 |
| 4 | PNQI | 0.8414 | 0.0000 | 0.7743 | 0.0000 | 0.1154 | 0.0000 | 0.7860 | 0.0000 | 0.1212 | 0.0000 | 0.9543 |
| 5 | MTUM | 0.8402 | 0.0000 | 0.6650 | 0.0000 | 0.0674 | 0.0000 | 0.1414 | 0.0000 | 0.1593 | 0.0013 | 0.0000 |
| 6 | XBI | 0.4045 | 0.0000 | 0.7551 | 0.0000 | 0.0000 | 0.0000 | 0.5992 | 0.0000 | 0.0000 | 0.0000 | 0.2040 |
| 7 | SPHQ | 0.9862 | 0.0000 | 0.6276 | 0.0000 | 0.0005 | 0.9666 | 0.4836 | 0.0000 | 0.0009 | 0.5019 | 0.1961 |
| 8 | ESGU | 0.1718 | 0.0000 | 0.0176 | 0.0000 | 0.0000 | 0.0157 | 0.0162 | 0.0000 | 0.0000 | 0.0156 | 0.5144 |
| 9 | SECT | 0.1981 | 0.0000 | 0.3003 | 0.0000 | 0.4495 | 0.1070 | 0.1844 | 0.0000 | 0.1612 | 0.0061 | 0.0088 |
| 10 | VUG | 0.8711 | 0.0000 | 0.1676 | 0.0000 | 0.0000 | 0.0000 | 0.1467 | 0.0000 | 0.0000 | 0.0000 | 0.5717 |
| 11 | LRGF | 0.0032 | 0.0000 | 0.0036 | 0.0000 | 0.5829 | 0.0003 | 0.0021 | 0.0000 | 0.3880 | 0.0001 | 0.1531 |
# To analize the R2 from models
models_r2
| index | CAPM | 3FF | 3MFF | |
|---|---|---|---|---|
| 0 | VTV | 0.8825 | 0.9597 | 0.9598 |
| 1 | VGT | 0.8026 | 0.8649 | 0.8649 |
| 2 | VB | 0.8678 | 0.9611 | 0.9625 |
| 3 | USMV | 0.7538 | 0.8001 | 0.8092 |
| 4 | PNQI | 0.6207 | 0.7191 | 0.7191 |
| 5 | MTUM | 0.8131 | 0.8805 | 0.9247 |
| 6 | XBI | 0.3930 | 0.6704 | 0.6751 |
| 7 | SPHQ | 0.9246 | 0.9326 | 0.9336 |
| 8 | ESGU | 0.9867 | 0.9921 | 0.9922 |
| 9 | SECT | 0.9440 | 0.9486 | 0.9555 |
| 10 | VUG | 0.9054 | 0.9652 | 0.9653 |
| 11 | LRGF | 0.9420 | 0.9528 | 0.9540 |
# R2 measures - visualization
fig = px.bar(models_r2, x="index", y="CAPM", title='CAPM R2 for ETF', color_discrete_sequence=px.colors.qualitative.Antique)
fig.show()
fig2 = px.bar(models_r2, x="index", y="3FF", title='3 Factors model FF R2 for ETF', color_discrete_sequence=px.colors.qualitative.Pastel1)
fig2.show()
fig3 = px.bar(models_r2, x="index", y="3MFF", title='3 Factors model FF with Momentum R2 for ETF', color_discrete_sequence=px.colors.qualitative.Dark24)
fig3.show()
# R2 adjusted measures - visualization
fig = px.bar(models_r2a, x="index", y="CAPM", title='CAPM R2 adjusted for ETF', color_discrete_sequence=px.colors.qualitative.Vivid)
fig.show()
fig2 = px.bar(models_r2a, x="index", y="3FF", title='3 Factors model FF R2 adjusted for ETF', color_discrete_sequence=px.colors.qualitative.Pastel2)
fig2.show()
fig3 = px.bar(models_r2a, x="index", y="3MFF", title='3 Factors model FF with Momentum R2 adjusted for ETF', color_discrete_sequence=px.colors.qualitative.Set1)
fig3.show()
# Cleaning the memory
del [fig, fig2, fig3, fig4, fig5, models3FF_r2, df, data, models3FF_r2a]